#import dataset
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset = pd.read_csv("/Users/Desktop/beathika treatment/Kimchi_dataset.csv")
dataset.columns
Index(['Date', 'Price', 'Total Volume', 'Total Boxes', 'Small Boxes',
'Large Boxes', 'XLarge Boxes', 'Region'],
dtype='object')
dataset
| Date | Price | Total Volume | Total Boxes | Small Boxes | Large Boxes | XLarge Boxes | Region | |
|---|---|---|---|---|---|---|---|---|
| 0 | 3/25/2018 | 1.71 | 2321.82 | 2006.46 | 1996.46 | 10.00 | 0.0 | Seoul |
| 1 | 3/18/2018 | 1.66 | 3154.45 | 2580.60 | 2577.27 | 3.33 | 0.0 | Seoul |
| 2 | 3/11/2018 | 1.68 | 2570.52 | 2209.29 | 2209.29 | 0.00 | 0.0 | Seoul |
| 3 | 3/4/2018 | 1.48 | 3851.30 | 3242.98 | 3239.65 | 3.33 | 0.0 | Seoul |
| 4 | 2/25/2018 | 1.56 | 5356.63 | 4007.48 | 4007.48 | 0.00 | 0.0 | Seoul |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 643 | 2/4/2018 | 1.63 | 17074.83 | 13498.67 | 13066.82 | 431.85 | 0.0 | Boryeong |
| 644 | 1/28/2018 | 1.71 | 13888.04 | 9264.84 | 8940.04 | 324.80 | 0.0 | Boryeong |
| 645 | 1/21/2018 | 1.87 | 13766.76 | 9394.11 | 9351.80 | 42.31 | 0.0 | Boryeong |
| 646 | 1/14/2018 | 1.93 | 16205.22 | 10969.54 | 10919.54 | 50.00 | 0.0 | Boryeong |
| 647 | 1/7/2018 | 1.62 | 17489.58 | 12014.15 | 11988.14 | 26.01 | 0.0 | Boryeong |
648 rows × 8 columns
from pandas_profiling import ProfileReport
prof = ProfileReport(dataset)
prof